/*
Project:				Perry Intergenerational
Author:					Victor Ronda
Original Date:			January 21, 2022
*/



*-------------------------------------------------------------------------------------*
*  Computing the Treatment Effect Estimates and p-values for Non-biological Children  *
*-------------------------------------------------------------------------------------*



*------------------------------- Preamble --------------------------------------*

clear all
set more off
set matsize 10000
set maxvar 32767, permanently
version 15.0


*** Setting environment and filepaths ***
global klmshare : env klmshare
global klmperry : env klmperry
global data /share/klmperry/TE/data
global emp /share/klmperry/multigen_revision/Empirics

*-------------------------------------------------------------------------------*

*** Load penalized logit and penalized probit programs ***
do $emp/Scripts/penalized_logit.do

*--------------------------------- Globals -------------------------------------*

global xvars iq ses male mwork
global sxvars iq ses mwork

global dvarlist1 ghealth2 nspecialedu nsusp narrested
global dvarlist2 
global dvarlist3 ntpreg
global dvarlist4 
global dvarlist5 femp  yedu ndivor


local alist1 "00"
local alist2 "18"
local alist3 "19"
local alist4 "21"
local alist5 "23"


forval i = 1/5 {
	global depvarlist`i'
	foreach dvar in ${dvarlist`i'} {
		foreach k in `alist`i'' {
			global depvarlist`i' "${depvarlist`i'} `dvar'`k'"
		}
	}
}

global depvarlist "$depvarlist1 $depvarlist2 $depvarlist3 $depvarlist4 $depvarlist5"


use $emp/Cleaned_data/children_nonbio.dta, clear


*-------------------------------------------------------------------------------*
*        Computing the Treatment Effect Estimates           					*
*-------------------------------------------------------------------------------*

program simx, rclass

cap gen streat = treat

*Compute a score for propensity of receiving treatment:
cap penlogit "streat" "$xvars" "" "predict pd, pr"

*Compute a score for propensity of being interviewed given the treatment status:
forval v = 0/1 {
	foreach k in 00 18 19 21 23 {
	
		cap su interviewed`k' if streat==`v'
		
		if r(mean)==1 {
			cap gen pi`k'`v' = 1 if interviewed`k'==1 
		}
		else {
			cap penlogit "interviewed`k'" "$xvars" "streat==`v'" "predict pi`k'`v' if interviewed`k'==1, pr"
		}
		
	}
}





*** Computing treatment effect estimates and t-statistics for each outcome variable ***

foreach depvar in $depvarlist {
qui {

local k = reverse(substr(reverse("`depvar'"), 1, 2)) //for subgroup analysis of participants' children at or over age k

//locals for gender of the children of the participants
local cm0 "0"
local cm1 "1"
local cm2 "0,1"

//locals for gender of the participant
local pm0 "0"
local pm1 "1"
local pm2 "0,1"

forval j = 0/2 {
	
	local dvar "`depvar'_m`j'"
	cap gen nmiss = (`dvar' <.) //indicator of non-missing data for the dependent variable

*** For AIPW estimates, generate non-missing-ness probabilities and conditional expectations of the outcome ***	

*Compute a score for propensity of having a non-missing outcome given the treatment and interview:	
	forval v = 0/1 {
		cap su nmiss if streat==`v' & interviewed`k'==1
		if r(mean) == 1{
			cap gen pn`v' = 1 if nmiss==1 & streat==`v'
		}
		else {
			cap penlogit "nmiss" "$xvars" "streat==`v' & interviewed`k'==1" "predict pn`v' if nmiss==1 & streat==`v', pr"
		}
	}

*Inverse probability weights		
cap gen a1 = streat/(pn1 * pi`k'1 * pd) if nmiss==1 & streat==1
cap replace a1 = 0 if nmiss==1 & streat==0
cap gen a0 = (1 - streat)/(pn0 * pi`k'0 * (1 - pd)) if nmiss==1 & streat==0
cap replace a0 = 0 if nmiss==1 & streat==1
cap gen a = a1 + a0

*OLS-based conditional expectations of the outcome to use in the AIPW estimator
local pm0 "0"
local pm1 "1"
forval i = 0/1 {
	forval v = 0/1 {
		cap count if inlist(male,`pm`i'') & `dvar' <. & streat==`v' & nmiss==1
		cap local count_obs = r(N)
		
		if `count_obs' > 3 {
			cap reg `dvar' $sxvars if inlist(male,`pm`i'') & nmiss==1 & streat==`v'
			cap predict `dvar'_aipw_`v'_`i' if inlist(male,`pm`i'') & inlist(nmiss,0,1)
		}
		else if `count_obs' >= 1 & `count_obs' <= 3 {
			 cap su `dvar' if inlist(male,`pm`i'') & nmiss==1 & streat==`v'
			 cap gen `dvar'_aipw_`v'_`i' = r(mean) if inlist(male,`pm`i'') & inlist(nmiss,0,1)
		}
		else {
			cap gen `dvar'_aipw_`v'_`i' = . if inlist(male,`pm`i'') & inlist(nmiss,0,1)
		}
		
	}
}
cap gen `dvar'_aipw_1 = `dvar'_aipw_1_1 if male==1
cap replace `dvar'_aipw_1 = `dvar'_aipw_1_0 if male==0
cap gen `dvar'_aipw_0 = `dvar'_aipw_0_1 if male==1
cap replace `dvar'_aipw_0 = `dvar'_aipw_0_0 if male==0

*AIPW estimator at the individual level, which will be averaged later
cap gen te_aipw = (`dvar'_aipw_1 + a1 * (`dvar' - `dvar'_aipw_1)) - (`dvar'_aipw_0 + a0 * (`dvar' - `dvar'_aipw_0)) if nmiss==1
cap replace te_aipw = `dvar'_aipw_1 - `dvar'_aipw_0 if nmiss==0

//locals for gender of the participant
local pm0 "0"
local pm1 "1"
local pm2 "0,1"
forval i = 0/2 {

	cap su `dvar' if inlist(male,`pm`i'')
	cap return scalar `depvar'_nmisscount_`j'`i' = r(N)
	
	cap su `dvar' if inlist(male,`pm`i'') & streat==0
	cap return scalar `depvar'_cmean_`j'`i' = r(mean)
	
	cap su `dvar' if inlist(male,`pm`i'') & streat==1
	cap return scalar `depvar'_tmean_`j'`i' = r(mean)

*** UDIM (unconditional difference in means) ***	
	
	cap su streat if inlist(male,`pm`i'') & nmiss==1
	if r(mean) > 0 & r(mean) < 1 {
		cap reg `dvar' streat if inlist(male,`pm`i'') & nmiss==1, cluster(sid)
		cap return scalar `depvar'_udimd_`j'`i' = _b[streat]
		if _rc==0 {
			if _b[streat]==0 {
				cap return scalar `depvar'_udimt_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_udimt_`j'`i' = _b[streat]/_se[streat]
			}
		}
	}
	
*** COLS (conditional ordinary least squares-based treatment effect estimate) ***
	
	cap su streat if inlist(male,`pm`i'') & nmiss==1
	if r(mean) > 0 & r(mean) < 1 {
		cap reg `dvar' streat $xvars if inlist(male,`pm`i'') & nmiss==1, cluster(sid)
		cap return scalar `depvar'_colsd_`j'`i' = _b[streat]
		if _rc==0 {
			if _b[streat]==0 {
				cap return scalar `depvar'_colst_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_colst_`j'`i' = _b[streat]/_se[streat]
			}
		}
	}
	
*** AIPW (augmented inverse probability weighting-based treatment effect estimate) ***

	cap reg te_aipw if inlist(male,`pm`i''), cluster(sid)
	cap return scalar `depvar'_aipwd_`j'`i' = _b[_cons]
	if _rc==0 {
		if _b[_cons]==0 {
			cap return scalar `depvar'_aipwt_`j'`i' = 0
		}
		else {
			cap return scalar `depvar'_aipwt_`j'`i' = _b[_cons]/_se[_cons]
		}
	}
	
		cap leebounds `dvar' streat if inlist(male,`pm`i''), select(anychild`k'a)
		cap mat leetable = r(table)
		if _rc==0 {
			cap return scalar `depvar'_lblld_`j'`i' = leetable[1,1]
			if leetable[1,1]==0 {
				cap return scalar `depvar'_lbllt_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_lbllt_`j'`i' = leetable[1,1]/leetable[2,1]
			}
			
			cap return scalar `depvar'_lbuld_`j'`i' = leetable[1,2]
			if leetable[1,2]==0 {
				cap return scalar `depvar'_lbult_`j'`i' = 0
			}
			else {
				cap return scalar `depvar'_lbult_`j'`i' = leetable[1,2]/leetable[2,2]
			}
		}	

}

foreach tempovar in nmiss pn0 pn1 a1 a0 a `dvar'_aipw_1_1 `dvar'_aipw_1_0 `dvar'_aipw_0_1 `dvar'_aipw_0_0 `dvar'_aipw_1 `dvar'_aipw_0 te_aipw {
	qui cap drop `tempovar'
}

}
	
}
}

foreach tempovar in pd pi* {
	qui cap drop `tempovar'
}

cap drop streat

end

global storeresults ""
foreach depvar in $depvarlist {
	forval j = 0/2 {
		forval i = 0/2 {
			foreach result in nmisscount cmean tmean udimd udimt colsd colst aipwd aipwt  lblld lbllt lbuld lbult {
				global storeresults "$storeresults `depvar'_`result'_`j'`i' = r(`depvar'_`result'_`j'`i')"
			}
		}
	}
}




*** Main Estimates ***
simulate $storeresults, seed(12345) reps(1) noisily saving($emp/Results/estimates_nonbio_children.dta, replace every(1)): simx
*** Estimates for Permutation Tests***
permute treat $storeresults, seed(12345) reps(1000) saving($emp/Results/estimates_perm_nonbio_children.dta, replace every(20)): simx

*** Estimates for Bootstrap Tests***
bootstrap $storeresults, seed(12345) cluster(id) idcluster(idnew) nodrop reps(1000) saving($emp/Results/estimates_boot_nonbio_children.dta, replace every(20)): simx


*-------------------------------------------------------------------------------*
*                         Computing Asymptotic P-Values                         *
*-------------------------------------------------------------------------------*

use $emp/Results/estimates_nonbio_children.dta, clear
drop *_nmisscount_* *_cmean_* *_tmean_*

foreach dvar in $depvarlist {
foreach estm in udim cols aipw {
forval j = 0/2 {
forval i = 0/2 {

qui {

*Computing the asymptotic p-value based on asymptotic standard error
local aapval = normal(-abs(`dvar'_`estm't_`j'`i'[1]))

*Computing the asymptotic p-value based on bootstrap standard error
su `dvar'_`estm'd_`j'`i'
local abpval = normal(-abs(`dvar'_`estm'd_`j'`i'[1]/r(sd)))
if normal(-abs(`dvar'_`estm'd_`j'`i'[1]/r(sd)))==. {
local abpval = `aapval'
}

replace `dvar'_`estm'd_`j'`i' = `aapval' if _n==1
replace `dvar'_`estm't_`j'`i' = `abpval' if _n==1

}

}
}
}
}

keep if _n==1

save $emp/Results/asym_pval_nonbio_children, replace


*-------------------------------------------------------------------------------*
*                         Computing Permutation P-Values                         *
*-------------------------------------------------------------------------------*
use $emp/Results/estimates_nonbio_children.dta, clear
append using $emp/Results/estimates_perm_nonbio_children.dta



foreach dvar in $depvarlist {
forval j = 0/2 {
forval i = 0/2 {

qui {


count if _n > 1 & `dvar'_aipwd_`j'`i' <.
local bootB = r(N)
count if _n > 1 & `dvar'_aipwd_`j'`i' >= `dvar'_aipwd_`j'`i'[1]
local p_r = (r(N) + 1)/`bootB'
replace `dvar'_aipwd_`j'`i' =  `p_r' if _n==1

}
}
}
}


keep if _n==1


save $emp/Results/perm_pval_nonbio_children, replace
aa

*-------------------------------------------------------------------------------*
*                         Computing Bootstrap P-Values                         *
*-------------------------------------------------------------------------------*
use $emp/Results/estimates_nonbio_children.dta, clear
append using $emp/Results/estimates_boot_nonbio_children.dta

foreach dvar in $depvarlist {
foreach estm in aipw cols udim {
	forval j = 0/2 {
forval i = 0/2 {
egen temp=sd(`dvar'_`estm'd_`j'`i') if _n>1
egen `dvar'_`estm'se_`j'`i'=mean(temp)
drop temp
}
}
}
}

foreach dvar in $depvarlist {
foreach estm in aipw {
forval j = 0/2 {
forval i = 0/2 {

qui {
*Computing the bootstrap p-value (type II p-value) based on unstudentized test statistic

count if _n > 1 & `dvar'_`estm'd_`j'`i' <.
local bootB = r(N)
count if _n > 1 & `dvar'_`estm'd_`j'`i' >= 0
local p_r = (r(N) + 1)/`bootB'
count if _n > 1 & `dvar'_`estm'd_`j'`i' <= 0
local p_l = (r(N) + 1)/`bootB'

*Computing the bootstrap p-value (percentile-t p-value) based on studentized test statistic

count if _n > 1 & ( (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])/(`dvar'_`estm'd_`j'`i'/`dvar'_`estm't_`j'`i') >= `dvar'_`estm't_`j'`i'[1] | ((`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])==0 & (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1]) >= `dvar'_`estm't_`j'`i'[1] & `dvar'_`estm'd_`j'`i' <.) )
local q_r = (r(N) + 1)/`bootB'
count if _n > 1 & ( (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])/(`dvar'_`estm'd_`j'`i'/`dvar'_`estm't_`j'`i') <= `dvar'_`estm't_`j'`i'[1] | ((`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1])==0 & (`dvar'_`estm'd_`j'`i' - `dvar'_`estm'd_`j'`i'[1]) <= `dvar'_`estm't_`j'`i'[1] & `dvar'_`estm'd_`j'`i' <.) )
local q_l = (r(N) + 1)/`bootB'

replace `dvar'_`estm'd_`j'`i' =  min(`p_l',`p_r',1) if _n==1
replace `dvar'_`estm't_`j'`i' =  min(`q_l',`q_r',1) if _n==1

}

}
}
}
}
keep if _n==1

save $emp/Results/boot_pval_nonbio_children, replace

